Load libraries

knitr::opts_chunk$set(echo = TRUE)

list.of.packages <- c("gsheet", "tidyverse", "janitor", "plotly", "glmmTMB", "metafor", "broom.mixed", "car") #add new libraries here 

new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

# Load all libraries 
lapply(list.of.packages, FUN = function(X) {
  do.call("require", list(X)) 
})
sessionInfo()

Prepare data

NOTE: data is read directly from the GoogleSheet using a share link that was set to “anyone with a link can view”

# Read in data from GoogleSheet 
data.fert <- as_tibble(gsheet2tbl('https://docs.google.com/spreadsheets/d/111SuH548Et6HDckjbQjtYk-B8A5VfNkUYZgcUNRPxxY/edit?usp=sharing'))
## Warning: Missing column names filled in: 'X22' [22]
#replace NR (not reported) with NA and convert columns to factor / numeric where needed 
data.fert <- data.fert %>% 
  na_if("NR") %>%     
  mutate_at(c('Phylum', 'Study', 'Taxonomic Group', 'Common name', 'Latin name', 'Error statistic'), as.factor) %>%
  mutate_at(c('pH Experimental', 'pH Control', 'pCO2 Experimental', 'pCO2 Control', 'Ave. Fert. % @ pH', 'Error % @ pH', '# Trials @ pH', 'Insemination time'), as.numeric) %>%
  clean_names()  # fill in spaces with underscores for column names 
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
#data.fert %>% select(study, ave_fert_percent_p_h, error_percent_p_h, error_statistic, number_trials_p_h) %>% View()

Explore data with figures

ggplotly(
data.fert %>%
ggplot(mapping=aes(x=p_h_experimental, y=ave_fert_percent_p_h, group=taxonomic_group, col=taxonomic_group, text=`common_name`)) + 
  geom_point(size=1.5, width=0.02) +
  facet_wrap(~phylum, scale="free") +
  geom_smooth(method="lm", se=TRUE, aes(fill=taxonomic_group)) +
  ggtitle("Fertilization Rate ~ pH exposure by phylum, linear") +
  theme_minimal() 
)
## Warning: Ignoring unknown parameters: width
## Warning: Removed 25 rows containing non-finite values (stat_smooth).
ggplotly(
data.fert %>%
ggplot(mapping=aes(x=p_h_experimental, y=ave_fert_percent_p_h, group=taxonomic_group, col=taxonomic_group, text=`common_name`)) + 
  geom_point(size=1, width=0.02) +
  facet_wrap(~phylum, scale="free") +
  geom_smooth(method="lm", se=TRUE, formula=y ~ poly(x, 2, raw=TRUE), aes(fill=taxonomic_group)) +
  ggtitle("Fertilization Rate ~ pH exposure by phylum, polynomial") +
  theme_minimal()
)
## Warning: Ignoring unknown parameters: width

## Warning: Removed 25 rows containing non-finite values (stat_smooth).
## Warning in predict.lm(model, newdata = new_data_frame(list(x = xseq)), se.fit =
## se, : prediction from a rank-deficient fit may be misleading

Convert all error values to separate SE & SD columns

95% Confidence Interval

Upper 95% CI = Mean + 1.96*SE; I recorded the difference between the upper 95%CI and the mean (Upper 95%CI - Mean). To convert I will use:

SE = (Upper 95%CI - Mean) / 1.96
SD = ((Upper 95%CI - Mean) / 1.96) * sqrt(n)

Standard Deviation / Standard Error conversions

SE= SD/sqrt(n)
SD = SE*sqrt(n)
where n=sample size

data.fert <- data.fert %>% 
  mutate(SE =  case_when(error_statistic == "SD" ~ error_percent_p_h/sqrt(number_trials_p_h), 
         error_statistic == "95% CI" ~ error_percent_p_h/1.96,
         is.na(error_statistic) ~ error_percent_p_h, 
         error_statistic == "SE" ~ error_percent_p_h)) 

data.fert <- data.fert %>% 
  mutate(SD =  case_when(error_statistic == "SE" ~ error_percent_p_h*sqrt(number_trials_p_h), 
         error_statistic == "95% CI" ~ (error_percent_p_h/1.96)*sqrt(number_trials_p_h),
         is.na(error_statistic) ~ error_percent_p_h, 
         error_statistic == "SD" ~ error_percent_p_h)) 

Calculate delta pH (pH experimental - pH control)

data.fert <- data.fert %>% 
  mutate(pH_delta = p_h_control-p_h_experimental)

Convert % fertilization data to proportion fertilized, and replace any values >1 (aka 100% fertilized) with 1

data.fert <- data.fert %>% 
  mutate(ave_fert_proport = case_when(ave_fert_percent_p_h <= 100 ~ ave_fert_percent_p_h/100,
                                      ave_fert_percent_p_h > 100 ~ 1))

Inspect data

data.fert$ave_fert_proport %>% hist()

ggplot(data.fert, aes(group=phylum, col=phylum)) + geom_density(aes(ave_fert_proport))
## Warning: Removed 13 rows containing non-finite values (stat_density).

# How many studies per ~phylum? 
data.fert %>%
  select(phylum, study) %>%
  distinct(phylum, study) %>%
  group_by(phylum) %>% count()
## # A tibble: 4 x 2
## # Groups:   phylum [4]
##   phylum            n
##   <fct>         <int>
## 1 Cnidaria          4
## 2 Crustacea         5
## 3 Echinodermata    12
## 4 Mollusca         18
# How many studies per taxonomic group? 
data.fert %>%
  select(phylum, study, taxonomic_group) %>%
  distinct(phylum, study, taxonomic_group) %>%
  group_by(taxonomic_group) %>% count()
## # A tibble: 10 x 2
## # Groups:   taxonomic_group [10]
##    taxonomic_group     n
##    <fct>           <int>
##  1 abalone             2
##  2 clam                5
##  3 copepod             3
##  4 coral               4
##  5 mussel              4
##  6 non-copepod         2
##  7 non-urchin          2
##  8 oyster              5
##  9 scallop             3
## 10 sea urchin         11

Calculate weights for models

weights <- metafor::escalc(measure='MN',
                mi=data.fert$ave_fert_percent_p_h,
                sdi = data.fert$SD,
                ni=data.fert$number_trials_p_h, options(na.action="na.pass"))  
data.fert$w <-weights$vi

How many studies per phylum after filtering out those w/o weight calculation?

# How many studies per ~phylum? 
data.fert %>% drop_na(w) %>%
  select(phylum, study) %>%
  distinct(phylum, study) %>%
  group_by(phylum) %>% count()
## # A tibble: 4 x 2
## # Groups:   phylum [4]
##   phylum            n
##   <fct>         <int>
## 1 Cnidaria          3
## 2 Crustacea         5
## 3 Echinodermata    11
## 4 Mollusca         15

Generate binomial models

test1 <- glmmTMB(ave_fert_proport ~ p_h_experimental + taxonomic_group/phylum + (1|study), data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in fitTMB(TMBStruc): Model convergence problem; non-positive-definite
## Hessian matrix. See vignette('troubleshooting')
test2 <- glmmTMB(ave_fert_proport ~ p_h_experimental*phylum + (1|study), data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
test3 <- glmmTMB(ave_fert_proport ~ p_h_experimental:phylum + (1|study), data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
test4 <- glmmTMB(ave_fert_proport ~ p_h_experimental + phylum + (1|study), data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
test5 <- glmmTMB(ave_fert_proport ~ p_h_experimental + (1|study), data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
test6 <- glmmTMB(ave_fert_proport ~ 1 + (1|study), data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
test7 <- glmmTMB(ave_fert_proport ~ phylum + (1|study), data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
test8 <- glmmTMB(ave_fert_proport ~ p_h_experimental, data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

Determine best fit model from AIC values

AIC(test1, test2, test3, test4, test5, test6, test7, test8) #test2 smallest AIC.  
## Warning in AIC.default(test1, test2, test3, test4, test5, test6, test7, : models
## are not all fitted to the same number of observations
##       df      AIC
## test1 42       NA
## test2  9 77.77826
## test3  6 75.87072
## test4  6 75.25456
## test5  3 73.38311
## test6  2 88.67440
## test7  5 92.48862
## test8  2 76.37486

Examine best fit model (test2)

anova(test5, test2)
## Data: drop_na(data.fert, w)
## Models:
## test5: ave_fert_proport ~ p_h_experimental + (1 | study), zi=~0, disp=~1
## test2: ave_fert_proport ~ p_h_experimental * phylum + (1 | study), zi=~0, disp=~1
##       Df    AIC     BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)
## test5  3 73.383  84.434 -33.692   67.383                         
## test2  9 77.778 110.930 -29.889   59.778 7.6049      6     0.2685
car::Anova(test2) #phylum, pH, & phylum:pH sign. factors 
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: ave_fert_proport
##                          Chisq Df Pr(>Chisq)   
## p_h_experimental        8.0050  1   0.004665 **
## phylum                  4.5390  3   0.208839   
## p_h_experimental:phylum 3.1555  3   0.368260   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(test2)
##  Family: binomial  ( logit )
## Formula:          ave_fert_proport ~ p_h_experimental * phylum + (1 | study)
## Data: drop_na(data.fert, w)
## Weights: 1/(w + 1)^2
## 
##      AIC      BIC   logLik deviance df.resid 
##     77.8    110.9    -29.9     59.8      285 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance Std.Dev.
##  study  (Intercept) 0.449    0.6701  
## Number of obs: 294, groups:  study, 31
## 
## Conditional model:
##                                      Estimate Std. Error z value Pr(>|z|)
## (Intercept)                           -48.965     36.553  -1.340    0.180
## p_h_experimental                        6.317      4.643   1.360    0.174
## phylumCrustacea                        27.487     86.450   0.318    0.751
## phylumEchinodermata                    13.787     38.705   0.356    0.722
## phylumMollusca                         38.141     37.242   1.024    0.306
## p_h_experimental:phylumCrustacea       -3.626     11.659  -0.311    0.756
## p_h_experimental:phylumEchinodermata   -1.608      4.931  -0.326    0.744
## p_h_experimental:phylumMollusca        -4.620      4.745  -0.974    0.330
car::Anova(test5) #phylum, pH, & phylum:pH sign. factors 
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: ave_fert_proport
##                   Chisq Df Pr(>Chisq)   
## p_h_experimental 9.1173  1   0.002532 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(test5)
##  Family: binomial  ( logit )
## Formula:          ave_fert_proport ~ p_h_experimental + (1 | study)
## Data: drop_na(data.fert, w)
## Weights: 1/(w + 1)^2
## 
##      AIC      BIC   logLik deviance df.resid 
##     73.4     84.4    -33.7     67.4      291 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance Std.Dev.
##  study  (Intercept) 1.953    1.397   
## Number of obs: 294, groups:  study, 31
## 
## Conditional model:
##                  Estimate Std. Error z value Pr(>|z|)   
## (Intercept)       -22.424      7.697  -2.914  0.00357 **
## p_h_experimental    3.100      1.027   3.019  0.00253 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Generate estimates & confidence intervals (log likelihood)

confint(test5)
##                                      2.5 %    97.5 %   Estimate
## cond.(Intercept)               -37.5096933 -7.338981 -22.424337
## cond.p_h_experimental            1.0879131  5.112854   3.100384
## study.cond.Std.Dev.(Intercept)   0.5926753  3.294427   1.397328

Inspect residuals ~ fitted values

aa5 <- augment(test5, data=drop_na(data.fert, w))
## Warning in mr - fitted(object): longer object length is not a multiple of
## shorter object length
#ggplotly(
ggplot(aa5, aes(x=.fitted,y=.resid)) + 
    geom_point() + 
  geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing missing values (geom_point).

#)

Generate model predictions and plot against real data

ph.min.max <- drop_na(data.fert, w) %>% 
  select(phylum, p_h_experimental) %>% 
  group_by(phylum) %>% 
  summarize(min=min(p_h_experimental, na.rm=TRUE), max=max(p_h_experimental, na.rm=TRUE))
  
phylum.list <- list()
for (i in 1:nrow(ph.min.max)) {
  phylum.list[[i]] <- data.frame(ph=c(seq(from=as.numeric(ph.min.max[i,"min"]), 
                            to=as.numeric(ph.min.max[i,"max"]), 
                            by=0.01)),
                   phylum=rep(c(ph.min.max[i,"phylum"])))
}
new.data <- bind_rows(phylum.list) %>% purrr::set_names(c("p_h_experimental", "phylum"))
new.data$study <- NA 
new.data$w <- NA 

predict.test.df <- predict(test5, newdata = new.data, se.fit = TRUE, type="response")
predict.test.df.df <- predict.test.df %>%
  as.data.frame() %>%
  cbind(new.data)

#scales::show_col(c("#e41a1c","#4daf4a","#ff7f00","#984ea3",'#377eb8'))

Generate figure: pH data by phylum, each with binomial regression model fit + CI

ggplotly(
ggplot() + 
  geom_jitter(data=drop_na(data.fert, w), aes(x=p_h_experimental, y=ave_fert_proport, group=phylum, col=phylum, label=study), size=1.2, width=0.03, col="gray40") +
  facet_wrap(~phylum, scales="free") + theme_minimal() +
  ggtitle("Fertilization success ~ pH with binomial-regression model predictions") + 
  xlab("Experimental pH") + ylab("Proportion fertilization success") +
  #scale_color_manual(name=NULL, values=c("#e41a1c","#ff7f00","#4daf4a",'#377eb8')) +
  theme_minimal() + 
  coord_cartesian(ylim = c(0, 1), xlim =c(6,8.5)) +
  geom_line(data = predict.test.df.df, aes(x=p_h_experimental, y=fit), col="gray50") + #, col=phylum
  geom_ribbon(data = predict.test.df.df, aes(x=p_h_experimental, ymin=fit-se.fit, ymax=fit+se.fit), linetype=2, alpha=0.1, col="gray50") + theme(legend.position = "none")) #add fill=phylum in geom_ribbon aes if color desired 
## Warning: Ignoring unknown aesthetics: label

Figure caption:

Fertilization success (%) by experimental pH across marine taxa examined in this review. Meta-analysis was performed using a binomial regression model, and indicates that fertilization success decreases with pH across Crustacea (5 studies), Echinodermata (12 studies), and Mollusca (18 studies). Fertilization success was not significantly affected by pH in Cnidaria (4 studies). Each point reflects the average % fertilization reported by one study at an experimental pH.

Run GLMs on each phylum

  1. Are slopes significantly different from zero?
  2. If so, what is the equation?

Cnidaria

model.cnidaria <- glmmTMB(ave_fert_proport ~ p_h_experimental + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Cnidaria"), binomial(link = "logit"), na.action=na.exclude,  weights = 1/(w+1)^2)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in fitTMB(TMBStruc): Model convergence problem; non-positive-definite
## Hessian matrix. See vignette('troubleshooting')
Anova(model.cnidaria)
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: ave_fert_proport
##                   Chisq Df Pr(>Chisq)
## p_h_experimental 2.0781  1     0.1494
summary(model.cnidaria) 
##  Family: binomial  ( logit )
## Formula:          ave_fert_proport ~ p_h_experimental + (1 | study)
## Data: data.fert %>% drop_na(w) %>% filter(phylum == "Cnidaria")
## Weights: 1/(w + 1)^2
## 
##      AIC      BIC   logLik deviance df.resid 
##       NA       NA       NA       NA       46 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance  Std.Dev.
##  study  (Intercept) 1.263e-05 0.003553
## Number of obs: 49, groups:  study, 3
## 
## Conditional model:
##                  Estimate Std. Error z value Pr(>|z|)
## (Intercept)       -48.827     34.241  -1.426    0.154
## p_h_experimental    6.273      4.351   1.442    0.149
predict.cnidaria <- predict(model.cnidaria, newdata = subset(new.data, phylum=="Cnidaria"), se.fit = TRUE, type="response")
predict.cnidaria.df <- predict.cnidaria %>%
  as.data.frame() %>%
  cbind(subset(new.data, phylum=="Cnidaria"))

ggplot() + 
  geom_jitter(data=data.fert %>% drop_na(w) %>% filter(phylum=="Cnidaria"), aes(x=p_h_experimental, y=ave_fert_proport), size=1.2, width=0.03, color="#e41a1c") +
  ggtitle("Cnidaria") + 
  xlab("Experimental pH") + ylab("Fertilization %") +
  theme_minimal() + 
  coord_cartesian(ylim = c(0, 1), xlim =c(6,8.5)) +
  geom_line(data = predict.cnidaria.df, aes(x=p_h_experimental, y=fit), col="#e41a1c") + 
  geom_ribbon(data = predict.cnidaria.df, aes(x=p_h_experimental, ymin=fit-se.fit, ymax=fit+se.fit), linetype=2, alpha=0.1, col="#e41a1c") 

Crustacea

model.crustacea <- glmmTMB(ave_fert_proport ~ p_h_experimental + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Crustacea"), binomial(link = "logit"), na.action=na.exclude,  weights = 1/(w+1)^2)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in (function (start, objective, gradient = NULL, hessian = NULL, : NA/
## NaN function evaluation
## Warning in fitTMB(TMBStruc): Model convergence problem; non-positive-definite
## Hessian matrix. See vignette('troubleshooting')
Anova(model.crustacea)
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: ave_fert_proport
##                   Chisq Df Pr(>Chisq)
## p_h_experimental 0.0635  1      0.801
summary(model.crustacea) 
##  Family: binomial  ( logit )
## Formula:          ave_fert_proport ~ p_h_experimental + (1 | study)
## Data: data.fert %>% drop_na(w) %>% filter(phylum == "Crustacea")
## Weights: 1/(w + 1)^2
## 
##      AIC      BIC   logLik deviance df.resid 
##       NA       NA       NA       NA       21 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance   Std.Dev.
##  study  (Intercept) 7.022e-109 8.38e-55
## Number of obs: 24, groups:  study, 5
## 
## Conditional model:
##                  Estimate Std. Error z value Pr(>|z|)
## (Intercept)        -20.51      75.22  -0.273    0.785
## p_h_experimental     2.58      10.24   0.252    0.801
predict.crustacea <- predict(model.crustacea, newdata = subset(new.data, phylum=="Crustacea"), se.fit = TRUE, type="response")
predict.crustacea.df <- predict.crustacea %>%
  as.data.frame() %>%
  cbind(subset(new.data, phylum=="Crustacea"))

ggplot() + 
  geom_jitter(data=data.fert %>% drop_na(w) %>% filter(phylum=="Crustacea"), aes(x=p_h_experimental, y=ave_fert_proport), size=1.2, width=0.03, col="#ff7f00") +
  ggtitle("Crustacea") + 
  xlab("Experimental pH") + ylab("Fertilization %") +
  theme_minimal() + 
  coord_cartesian(ylim = c(0, 1), xlim =c(6,8.5)) +
  geom_line(data = predict.crustacea.df, aes(x=p_h_experimental, y=fit), col="#ff7f00") +
  geom_ribbon(data = predict.crustacea.df, aes(x=p_h_experimental, ymin=fit-se.fit, ymax=fit+se.fit), linetype=2, alpha=0.1, col="#ff7f00") 

Echinodermata

model.echinodermata <- glmmTMB(ave_fert_proport ~ p_h_experimental + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Echinodermata"), binomial(link = "logit"), na.action=na.exclude,  weights = 1/(w+1)^2)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
Anova(model.echinodermata)
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: ave_fert_proport
##                   Chisq Df Pr(>Chisq)  
## p_h_experimental 6.3225  1    0.01192 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model.echinodermata) 
##  Family: binomial  ( logit )
## Formula:          ave_fert_proport ~ p_h_experimental + (1 | study)
## Data: data.fert %>% drop_na(w) %>% filter(phylum == "Echinodermata")
## Weights: 1/(w + 1)^2
## 
##      AIC      BIC   logLik deviance df.resid 
##     29.0     37.9    -11.5     23.0      142 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance Std.Dev.
##  study  (Intercept) 2.661    1.631   
## Number of obs: 145, groups:  study, 11
## 
## Conditional model:
##                  Estimate Std. Error z value Pr(>|z|)  
## (Intercept)       -41.938     16.894  -2.482   0.0131 *
## p_h_experimental    5.619      2.235   2.514   0.0119 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
predict.echinodermata <- predict(model.echinodermata, newdata = subset(new.data, phylum=="Echinodermata"), se.fit = TRUE, type="response")
predict.echinodermata.df <- predict.echinodermata %>%
  as.data.frame() %>%
  cbind(subset(new.data, phylum=="Echinodermata"))

ggplot() + 
  geom_jitter(data=data.fert %>% drop_na(w) %>% filter(phylum=="Echinodermata"), aes(x=p_h_experimental, y=ave_fert_proport), size=1.2, width=0.03, col="#4daf4a") +
  ggtitle("Echinodermata") + 
  xlab("Experimental pH") + ylab("Fertilization %") +
  theme_minimal() + 
  coord_cartesian(ylim = c(0, 1), xlim =c(6,8.5)) +
  geom_line(data = predict.echinodermata.df, aes(x=p_h_experimental, y=fit), col="#4daf4a") +
  geom_ribbon(data = predict.echinodermata.df, aes(x=p_h_experimental, ymin=fit-se.fit, ymax=fit+se.fit), linetype=2, alpha=0.1, col="#4daf4a") 

Mollusca

model.mollusca <- glmmTMB(ave_fert_proport ~ p_h_experimental + (1|study), data=data.fert %>% drop_na(w) %>% filter(phylum=="Mollusca"), binomial(link = "logit"), na.action=na.exclude,  weights = 1/(w+1)^2)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
Anova(model.mollusca)
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: ave_fert_proport
##                   Chisq Df Pr(>Chisq)  
## p_h_experimental 2.8595  1    0.09084 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model.mollusca) 
##  Family: binomial  ( logit )
## Formula:          ave_fert_proport ~ p_h_experimental + (1 | study)
## Data: data.fert %>% drop_na(w) %>% filter(phylum == "Mollusca")
## Weights: 1/(w + 1)^2
## 
##      AIC      BIC   logLik deviance df.resid 
##     19.8     26.8     -6.9     13.8       73 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance  Std.Dev.
##  study  (Intercept) 2.209e-06 0.001486
## Number of obs: 76, groups:  study, 14
## 
## Conditional model:
##                  Estimate Std. Error z value Pr(>|z|)  
## (Intercept)       -9.5492     6.4052  -1.491   0.1360  
## p_h_experimental   1.5196     0.8986   1.691   0.0908 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
predict.mollusca <- predict(model.mollusca, newdata = subset(new.data, phylum=="Mollusca"), se.fit = TRUE, type="response")
predict.mollusca.df <- predict.mollusca %>%
  as.data.frame() %>%
  cbind(subset(new.data, phylum=="Mollusca"))

ggplot() + 
  geom_jitter(data=data.fert %>% drop_na(w) %>% filter(phylum=="Mollusca"), aes(x=p_h_experimental, y=ave_fert_proport), size=1.2, width=0.03, col="#377eb8") +
  ggtitle("Mollusca") + 
  xlab("Experimental pH") + ylab("Fertilization %") +
  theme_minimal() + 
  coord_cartesian(ylim = c(0, 1), xlim =c(6,8.5)) +
  geom_line(data = predict.mollusca.df, aes(x=p_h_experimental, y=fit), col="#377eb8") + 
  geom_ribbon(data = predict.mollusca.df, aes(x=p_h_experimental, ymin=fit-se.fit, ymax=fit+se.fit), linetype=2, alpha=0.1, col="#377eb8") 
## Warning: Removed 12 rows containing missing values (geom_point).

Inspect candidate covariates - are these important experimental design factors?

convert candidate covariates to numeric

data.fert <- data.fert %>%
  mutate_at(vars(number_trials_p_h, insemination_time, sperm_per_m_l, sperm_egg_ratio, number_females, number_males), as.numeric) 
## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

Test Control pH

car::Anova(covariates1 <- glmmTMB(ave_fert_proport ~ p_h_control*p_h_experimental, data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no interaction 
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: ave_fert_proport
##                               Chisq Df Pr(>Chisq)   
## p_h_control                  0.0009  1   0.976649   
## p_h_experimental             9.3372  1   0.002246 **
## p_h_control:p_h_experimental 0.8145  1   0.366779   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplotly(
  ggplot(data.fert, aes(x=p_h_control, y=ave_fert_proport)) + 
    geom_jitter(aes(color = p_h_experimental, label=common_name), size=2.2, pch=19) + ggtitle("% Fertilization ~ Control pH\nsign. interaction") + 
    scale_color_gradientn(colours = rainbow(5))) 
## Warning: Ignoring unknown aesthetics: label
#    scale_color_gradient(low = "red", high = "blue"))

Test Insemination time

car::Anova(covariates2 <- glmmTMB(ave_fert_proport ~  insemination_time*p_h_experimental, data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #no interaction  
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in fitTMB(TMBStruc): Model convergence problem; non-positive-definite
## Hessian matrix. See vignette('troubleshooting')
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: ave_fert_proport
##                                      Chisq Df Pr(>Chisq)    
## insemination_time                   0.8437  1  0.3583462    
## p_h_experimental                   10.8566  1  0.0009845 ***
## insemination_time:p_h_experimental  0.7914  1  0.3736759    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplotly(
  ggplot(data.fert, aes(x=insemination_time, y=ave_fert_proport)) + 
    geom_point(aes(color = p_h_experimental, label=common_name), size=2.2, pch=19) + ggtitle("% Fertilization ~ Insemination Time\nsign. interaction_") + 
    scale_color_gradientn(colours = rainbow(5))) 
## Warning: Ignoring unknown aesthetics: label
#    scale_color_gradient(low = "red", high = "blue"))

Test sperm concentration

car::Anova(covariates3 <- glmmTMB(ave_fert_proport ~ sperm_per_m_l*p_h_experimental, data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #not sign 
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in fitTMB(TMBStruc): Model convergence problem; non-positive-definite
## Hessian matrix. See vignette('troubleshooting')
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: ave_fert_proport
##                                 Chisq Df Pr(>Chisq)
## sperm_per_m_l                  0.0133  1     0.9083
## p_h_experimental               1.4050  1     0.2359
## sperm_per_m_l:p_h_experimental 0.0004  1     0.9847
ggplotly(ggplot(data.fert, aes(x=sperm_per_m_l, y=ave_fert_proport)) + 
           geom_point(aes(color = p_h_experimental, label=common_name), size=2.2, pch=19) + 
           ggtitle("% Fertilization ~ Sperm Concentration\nnot sign.") + 
          scale_color_gradientn(colours = rainbow(5))) 
## Warning: Ignoring unknown aesthetics: label
#           scale_color_gradient(low = "red", high = "blue"))

Test sperm:egg ratio

car::Anova(covariates4 <- glmmTMB(ave_fert_proport ~ sperm_egg_ratio*p_h_experimental, data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #not sign 
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in fitTMB(TMBStruc): Model convergence problem; non-positive-definite
## Hessian matrix. See vignette('troubleshooting')
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: ave_fert_proport
##                                   Chisq Df Pr(>Chisq)   
## sperm_egg_ratio                  0.0073  1   0.931826   
## p_h_experimental                 6.8239  1   0.008994 **
## sperm_egg_ratio:p_h_experimental 0.0000  1   0.995566   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplotly(ggplot(data.fert, aes(x=sperm_egg_ratio, y=ave_fert_proport)) + 
           geom_point(aes(color = p_h_experimental, label=common_name), size=2.2, pch=19) + 
           ggtitle("% Fertilization ~ Sperm:Egg Ratio\nnot sign.") + 
    scale_color_gradientn(colours = rainbow(5))) 
## Warning: Ignoring unknown aesthetics: label
#           scale_color_gradient(low = "red", high = "blue"))

Test no. females used for experimental eggs

car::Anova(covariates5 <- glmmTMB(ave_fert_proport ~  number_females*p_h_experimental, data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #not sign.
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: ave_fert_proport
##                                  Chisq Df Pr(>Chisq)   
## number_females                  0.0000  1   0.994787   
## p_h_experimental                9.2071  1   0.002411 **
## number_females:p_h_experimental 0.0519  1   0.819759   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplotly(ggplot(data.fert, aes(x=number_females, y=ave_fert_proport)) + 
           geom_point(aes(color = p_h_experimental, label=common_name), size=2.2, pch=19) + 
           ggtitle("% Fertilization ~ No. Females\nno interaction") + 
    scale_color_gradientn(colours = rainbow(5))) 
## Warning: Ignoring unknown aesthetics: label
#           scale_color_gradient(low = "red", high = "blue"))

Test no. males used for experimental eggs

car::Anova(covariates6 <- glmmTMB(ave_fert_proport ~  number_males*p_h_experimental, data=drop_na(data.fert, w), binomial(link = "logit"), na.action=na.exclude, weights = 1/(w+1)^2)) #not sign. 
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: ave_fert_proport
##                                Chisq Df Pr(>Chisq)   
## number_males                  0.0568  1   0.811643   
## p_h_experimental              9.1877  1   0.002436 **
## number_males:p_h_experimental 0.0420  1   0.837529   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplotly(ggplot(data.fert, aes(x=number_males, y=ave_fert_proport)) + 
           geom_point(aes(color = p_h_experimental, label=common_name), size=2.2, pch=19) + 
           ggtitle("% Fertilization ~ No. Males\nsign. interaction") + 
     scale_color_gradientn(colours = rainbow(5))) 
## Warning: Ignoring unknown aesthetics: label
#          scale_color_gradient(low = "red", high = "blue"))

BONEYARD

Old portions of the analysis that I don’t yet want to remove, but probably will in the end.

Transform proportion fertilized data to remove 1’s

Transormation equation source: https://cran.r-project.org/web/packages/betareg/vignettes/betareg.pdf, " … if y also assumes the extremes 0 and 1, a useful transformation in practice is (y · (n − 1) + 0.5)/n where n is the sample size (Smithson and Verkuilen 2006)."

HOWEVER

issue is that a few studies only had 1 trial per pH, so the transformation results in NA values

 # data.fert$ave_fert_proport.t <- data.fert$ave_fert_proport*((data.fert$number_trials_p_h-1) + 0.5) / data.fert$number_trials_p_h

Instead, I simply subtracted 0.001 from all data (but not sure if I will use that transformed data)

data.fert$ave_fert_proport.t <- data.fert$ave_fert_proport - 0.001